County Level Correction
County Level Analysis for MA
library(tidyverse)
library(latex2exp)
library(patchwork)Base Functions
source(paste0(here::here(), "/analysis/base_functions/base_functions.R"))get_melded <- function(alpha_mean = 0.9,
alpha_sd = 0.04,
alpha_bounds = NA,
beta_mean = .15,
beta_sd =.09,
beta_bounds = NA,
s_untested_mean = .025,
s_untested_sd = .0225,
s_untested_bounds = NA,
p_s0_pos_mean = .4,
p_s0_pos_sd = .1225,
p_s0_pos_bounds = NA,
nsamp = 1e3) {
given_args <- as.list(environment())
# cat("Arguments to get_melded:\n")
# print(given_args)
theta <- tibble(alpha = sample_gamma_density(nsamp,
mean = alpha_mean,
sd = alpha_sd,
bounds = alpha_bounds),
beta= sample_beta_density(nsamp,
mean = beta_mean,
sd = beta_sd,
bounds = beta_bounds),
P_S_untested = sample_beta_density(nsamp,
mean = s_untested_mean,
sd = s_untested_sd,
bounds = s_untested_bounds)) %>%
mutate(phi_induced = est_P_A_testpos(P_S_untested = P_S_untested,
alpha = alpha,
beta=beta))
# message(paste0("nrows of theta: ", nrow(theta)))
# theta contains values sampled from alpha, beta, P_S_untested, and M(theta) = phi_induced
# induced phi
phi <- theta$phi_induced
# approximate induced distribution via a density approximation
phi_induced_density <- density(x = phi, n = nsamp, adjust = 2, kernel = "gaussian")
indexes <- findInterval(phi, phi_induced_density$x)
phi_sampled_density <- phi_induced_density$y[indexes]
dp_s0_pos <- function(x) {
beta_density(x,
mean=p_s0_pos_mean,
sd = p_s0_pos_sd,
bounds=p_s0_pos_bounds)
}
weights <- (phi_sampled_density/ dp_s0_pos(phi))^(.5)
post_samp_ind <-sample.int(n=nsamp,
size=nsamp,
prob=1/weights,
replace=TRUE)
post_melding <- bind_cols(theta[post_samp_ind,],
P_A_testpos = phi[post_samp_ind]) %>%
select(-phi_induced)
return(list(post_melding = post_melding, pre_melding = theta))
# return(post_melding)
}
#' reformat for plot generation
reformat_melded <- function(melded_df,
theta_df,
nsamp,
p_s0_pos_mean,
p_s0_pos_sd,
p_s0_pos_bounds) {
melded_df_long <- melded_df %>%
pivot_longer(cols=everything()) %>%
mutate(type = "After Melding")
melded <- theta_df %>%
mutate(P_A_testpos = sample_beta_density(nsamp,
mean = p_s0_pos_mean,
sd = p_s0_pos_sd,
bounds = p_s0_pos_bounds)) %>%
pivot_longer(cols=everything()) %>%
mutate(type = ifelse(
name == "phi_induced",
"Induced", "Before Melding")) %>%
mutate(name = ifelse(name == "phi_induced",
"P_A_testpos",
name)) %>%
bind_rows(melded_df_long) %>%
mutate(name = case_when(
name == "alpha" ~"$\\alpha$",
name == "beta" ~"$\\beta$",
name == "P_A_testpos" ~ "$P(S_0|test+,untested)$",
name == "P_S_untested" ~ "$P(S_1|untested)$")
) %>%
mutate(name = factor(name,
levels = c(
"$\\alpha$",
"$\\beta$",
"$P(S_1|untested)$",
"$P(S_0|test+,untested)$")))
}
plot_melded <- function(melded, custom_title="", nsamp) {
p1 <- melded %>%
filter(name != "$P(S_0|test+,untested)$") %>%
ggplot(aes(x = value, fill = type)) +
geom_density(alpha = .5, show.legend=FALSE) +
facet_wrap(~name,
labeller = as_labeller(
TeX, default = label_parsed),
ncol = 3,
scales = "fixed") +
theme_bw() +
theme(
# axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
axis.title = element_text(size = 18),
axis.text.x = element_text(size = 10),
plot.title =element_text(size = 18,
margin =margin(0,0, .5,0, 'cm')),
strip.text = element_text(size = 16),
legend.text = element_text(size = 16)) +
labs(title = TeX(custom_title,bold=TRUE),
subtitle =paste0("Number of Samples: ", nsamp),
fill = "",
y = "Density") +
scale_fill_manual(values = c("#5670BF", "#418F6A","#B28542")) +
guides(fill = guide_legend(keyheight = 2, keywidth = 2))
p2 <- melded %>%
filter(name == "$P(S_0|test+,untested)$") %>%
ggplot(aes(x = value, fill = type)) +
geom_density(alpha = .5) +
facet_wrap(~name,
labeller = as_labeller(
TeX, default = label_parsed),
ncol = 3,
scales = "fixed") +
theme_bw() +
theme(
# axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
axis.title = element_text(size = 18),
axis.text.x = element_text(size = 10),
plot.title =element_text(size = 18,
margin =margin(0,0, .5,0, 'cm')),
strip.text = element_text(size = 16),
legend.text = element_text(size = 18)) +
labs(
#title = paste0("Number of Samples: ", nsamp),
fill = "",
y = "Density") +
scale_fill_manual(values = c("#5670BF", "#418F6A","#B28542")) +
guides(fill = guide_legend(keyheight = 2, keywidth = 2)) +
xlim(0,1)
p1 / p2 + plot_layout(nrow =2, widths = c(4,1))
}
plot_melded <- function(melded, custom_title="", nsamp) {
p1 <- melded %>%
filter(name != "$P(S_0|test+,untested)$") %>%
ggplot(aes(x = value, fill = type)) +
geom_density(alpha = .5, show.legend=FALSE) +
facet_wrap(~name,
labeller = as_labeller(
TeX, default = label_parsed),
ncol = 3,
scales = "fixed") +
theme_bw() +
theme(
# axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
axis.title = element_text(size = 18),
axis.text.x = element_text(size = 10),
plot.title =element_text(size = 18,
margin =margin(0,0, .5,0, 'cm')),
strip.text = element_text(size = 16),
legend.text = element_text(size = 16)) +
labs(title = TeX(custom_title,bold=TRUE),
subtitle =paste0("Number of Samples: ", nsamp),
fill = "",
y = "Density") +
scale_fill_manual(values = c("#5670BF", "#418F6A","#B28542")) +
guides(fill = guide_legend(keyheight = 2, keywidth = 2))
p2 <- melded %>%
filter(name == "$P(S_0|test+,untested)$") %>%
ggplot(aes(x = value, fill = type)) +
geom_density(alpha = .5) +
facet_wrap(~name,
labeller = as_labeller(
TeX, default = label_parsed),
ncol = 3,
scales = "fixed") +
theme_bw() +
theme(
# axis.text.y = element_blank(),
# axis.ticks.y = element_blank(),
axis.title = element_text(size = 18),
axis.text.x = element_text(size = 10),
plot.title =element_text(size = 18,
margin =margin(0,0, .5,0, 'cm')),
strip.text = element_text(size = 16),
legend.text = element_text(size = 18)) +
labs(
#title = paste0("Number of Samples: ", nsamp),
fill = "",
y = "Density") +
scale_fill_manual(values = c("#5670BF", "#418F6A","#B28542")) +
guides(fill = guide_legend(keyheight = 2, keywidth = 2)) +
xlim(0,1)
p1 / p2 + plot_layout(nrow =2, widths = c(4,1))
}Initial Setup
Running analysis for ma.
Set Prior Parameters
# include slow plots comparing melded distributions for modified priors to original melded distribution
include_slow <- TRUE
# only include subset of rows for testing
testing <- FALSE
# whether to save results adj_v*.RDS (if testing, don't save results)
save <- TRUE
set.seed(123)
prior_params <- list(
alpha_mean = .95,
alpha_sd = 0.08,
alpha_bounds = NA,
# alpha_bounds = c(.8,1),
beta_mean = .15,
beta_sd =.09,
beta_bounds = NA,
# beta_bounds = c(0.002, 0.4),
s_untested_mean = .03,
s_untested_sd = .0225,
# s_untested_bounds = c(0.0018, Inf),
s_untested_bounds = NA,
p_s0_pos_mean = .4,
p_s0_pos_sd = .1225,
p_s0_pos_bounds = NA,
# p_s0_pos_bounds = c(.25, .7),
nsamp = 1e5)
corrected_sample_reps <- 1e3
# relevant for versions 2,3
beta_smoothing_span <- .2
# relevant for versions 3,4
s_untested_smoothing_span <- .2Read Data
state_name <- params$state
# for running when NOT knitting
if(!exists("state_name")) state_name <- "ma"
state_data_path <- paste0(here::here(),
"/data/county_level/",
state_name,
"/",
state_name, "_county_biweekly.RDS")
results_path <- paste0(
here::here(),
"/analysis/results/adj_biweekly_county/",
state_name, "/")
# objects before this point should not be removed
# when cleaning environment before running each version
do_not_remove <- c(ls(), "do_not_remove")
covid_county <- readRDS(state_data_path)# full descriptions by version
versions <- tibble(
version = c("v1", "v2", "v3", "v4"),
vlabel = c("Priors Do Not Vary by County and Date",
"Distribution for Beta is Centered at Empirical Value",
"Distributions for P(S_1|untested) and Beta Centered at Empirical Values",
"Distribution for P(S_1|untested) Centered at Empirical Value")
)Analysis
Version 1
Priors do not vary by state or date; melding performed once.
priors_version <- "v1"
melded <- do.call(get_melded, prior_params)
saveRDS(melded$post_melding,
paste0(
here::here(),
"/analysis/results/melded/constrained_v1.RDS"))
# only use a few rows if testing
covid_county <- if(testing) covid_county[1:4,] else covid_county
tictoc::tic()
corrected <- pmap_df(
# covid_county[1:4,],
covid_county,
~ {
process_priors_per_county(
priors = melded$post_melding,
county_df = list(...),
nsamp = prior_params$nsamp) %>%
generate_corrected_sample(., num_reps = corrected_sample_reps) %>%
summarize_corrected_sample()
})
tictoc::toc()## 402.929 sec elapsed
saveRDS(corrected,
paste0(results_path,
"adj_",
priors_version,
".RDS"))title <- paste0("Pre and Post Melding Distributions")
melded_long <- reformat_melded(melded_df = melded$post_melding,
theta_df = melded$pre_melding,
p_s0_pos_mean = prior_params$p_s0_pos_mean,
p_s0_pos_sd = prior_params$p_s0_pos_sd,
p_s0_pos_bounds = prior_params$p_s0_pos_bounds,
nsamp = prior_params$nsamp)
melded_long %>%
plot_melded(custom_title = title,
nsamp = prior_params$nsamp)Version 2
Time varying estimate of beta using the state-level estimate.
# remove objects from previous version
remove(list = ls()[!ls() %in% do_not_remove] )
covid_county <- readRDS(state_data_path)
source(paste0(here::here(), "/analysis/base_functions/base_functions.R"))
priors_version <- "v2"
fb_symptoms <- readRDS(
paste0(
here::here(),
"/data/state_level/screeningpos_all_states.RDS")) %>%
filter(state == state_name)
beta_prior <- tibble(value = sample_beta_density(1e5,
mean = prior_params$beta_mean,
sd = prior_params$beta_sd,
bounds = prior_params$beta_bounds),
type = "Prior on Beta")################################################################################################
# compare empirical distribution across all dates to prior on beta
################################################################################################
fb_symptoms %>%
select(signal, date, value, stderr) %>%
pivot_wider(names_from = signal,
values_from = c(value,stderr)) %>%
mutate(beta_est = value_smoothed_wscreening_tested_positive_14d/
value_smoothed_wtested_positive_14d)%>%
select(value = beta_est) %>%
mutate(type = "Estimate for Beta\n(Screening Test Positivity/Test Positivity)") %>%
bind_rows(beta_prior) %>%
ggplot(aes(x=value, fill = type)) +
geom_density(alpha = .6) +
theme_bw() +
labs(title = "Comparing Prior for Beta to Empirical Distribution\n from COVID-19 Trends and Impact Survey Data Across All Dates",
subtitle = paste0("State: ", toupper(state_name)),
fill = "") +
theme_c()dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))
beta_est <- fb_symptoms %>%
select(signal, date, value, stderr) %>%
pivot_wider(names_from = signal,
values_from = c(value,stderr)) %>%
mutate(beta_estimate = value_smoothed_wscreening_tested_positive_14d
/value_smoothed_wtested_positive_14d) %>%
arrange(date) %>%
mutate(index = 1:nrow(.)) %>%
ungroup() %>%
# fill last weeks (missing from survey data) with rolling mean from previous
# 3 observations
mutate(rolled_mean = RcppRoll::roll_mean(beta_estimate, n = 3, na.rm = FALSE, fill = NA)) %>%
fill(rolled_mean, .direction = "down") %>%
mutate(beta_estimate = ifelse(is.na(beta_estimate), rolled_mean, beta_estimate))
smoothed_beta <- loess(beta_estimate~index, data= beta_est, span = beta_smoothing_span)
beta_est <- beta_est %>%
mutate(beta_estimate_smoothed = predict(smoothed_beta)) ################################################################################################
# look at empirical beta estimates across time
################################################################################################
beta_est %>%
ggplot(aes(x=date, y = beta_estimate)) +
geom_line() +
geom_point(alpha = .5) +
geom_line(aes(y=beta_estimate_smoothed), color = "darkred", size = 1.2) +
theme_c() +
labs(title = "Estimates of Beta across Time",
subtitle = paste0("State: ", toupper(state_name)),
y = "Estimate of Beta") +
scale_x_date(date_breaks = "2 months", date_labels = "%b %d")################################################################################################
# summarize to one observation per biweek
################################################################################################
symptoms <- beta_est %>%
select(date, beta_estimate_smoothed) %>%
left_join(dates) %>%
group_by(biweek) %>%
# get last date since observation for date corresponds to value for previous
# 2 weeks
slice_max(n=1, order_by = date) %>%
ungroup() %>%
select(-date)# COMPARE PREMELDING DISTRIUBTIONS
compare_priors <- symptoms %>%
# only need biweeks in dates data frame
filter(!is.na(biweek)) %>%
pmap_df(~ {
df <- tibble(...)
#glimpse(df)
#print(df$beta_estimate_smoothed)
tibble(empirical = sample_beta_density(
1e4,
mean = df$beta_estimate_smoothed,
sd = prior_params$beta_sd),
original_prior = sample_beta_density(
1e4,
mean = prior_params$beta_mean,
sd = prior_params$beta_sd),
biweek = df$biweek)
})
# compare (unconstrained) distributions
compare_priors %>%
pivot_longer(c(empirical,original_prior),
names_to = "Prior") %>%
mutate(biweek = as.factor(biweek)) %>%
ggplot(aes(x = value, y=fct_reorder(biweek,
as.numeric(biweek),
.desc=TRUE),
fill = Prior)) +
ggridges::geom_density_ridges(alpha = .6) +
labs(y = "Biweek",
title = "Comparing Distribution Centered\nat Empirical Estimate of Beta\nto Original Prior\n(Not Melded)") +
theme_c(legend.title = element_text(face="bold", size = 16))covid_county <- covid_county %>%
left_join(symptoms) %>%
# only have CTIS data starting at week 6
# filter out the beginning dates where beta_estimate_smoothed is NA
filter(!is.na(beta_estimate_smoothed))
priors_constrained_by_biweek <- covid_county %>%
select(biweek,beta_estimate_smoothed) %>%
arrange(biweek) %>%
# there will be more than one observation per county since
# beta estimates are at the state level
distinct() %>%
pmap_df(~ {
args <- list(...)
# message(paste0("before: ",prior_params$beta_mean))
prior_params$beta_mean <- args$beta_estimate_smoothed
# message(paste0("after: ",prior_params$beta_mean))
res <- do.call(get_melded, prior_params)
res$post_melding %>%
mutate(biweek= args$biweek)
})Compare Melded Priors to Original Across Time
melded_original <- readRDS(paste0(
here::here(),
"/analysis/results/melded/constrained_v1.RDS"))
# plot melded distributions and compare to original
original <- map_df(6:30, ~{
melded_original %>%
select(beta) %>%
mutate(source = "original_prior",
biweek = .x)})
priors_constrained_by_biweek %>%
mutate(source = "melded distribution\ncentered at empirical value") %>%
select(beta, biweek, source) %>%
bind_rows(original) %>%
mutate(biweek = as.factor(biweek)) %>%
ggplot(aes(x = beta, y=fct_reorder(biweek,
as.numeric(biweek),
.desc=TRUE),
fill = source)) +
ggridges::geom_density_ridges(alpha = .6) +
labs(y = "Biweek",
title = "Comparing Post-melding Distribution Centered\nat Empirical Estimate of Beta\nto Original Prior") +
theme_c(legend.title = element_text(face="bold", size = 16))# only use a few rows if testing
covid_county <- if(testing) covid_county[1:4,] else covid_county
tictoc::tic()
corrected <- pmap_df(
# covid_county[1:4,],
covid_county,
~ {
input_df <- list(...)
message(paste0(input_df$biweek, ", " , input_df$fips))
process_priors_per_county(
priors = priors_constrained_by_biweek[priors_constrained_by_biweek$biweek == input_df$biweek,],
county_df = input_df,
nsamp = prior_params$nsamp) %>%
generate_corrected_sample(.,
num_reps = corrected_sample_reps) %>%
summarize_corrected_sample()
})
tictoc::tic()saveRDS(corrected,
paste0(results_path,
"adj_",
priors_version,
".RDS"))Version 3
Time varying estimate of beta using the state-level estimate, time-varying estimate of \(P(S_1|untested)\) using the state-level estimate of the percentage of the population experiencing COVID-like illness.
# remove objects from previous version
remove(list = ls()[!ls() %in% do_not_remove] )
covid_county <- readRDS(state_data_path)
dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))
source(paste0(here::here(), "/analysis/base_functions/base_functions.R"))
priors_version <- "v3"
fb_symptoms <- readRDS(
paste0(
here::here(),
"/data/state_level/screeningpos_all_states.RDS")) %>%
filter(state == state_name)emp_est <- fb_symptoms %>%
select(signal, date, value, state) %>%
pivot_wider(names_from = signal,
values_from = value) %>%
mutate(
beta_estimate = smoothed_wscreening_tested_positive_14d/smoothed_wtested_positive_14d,
s_untested_estimate = smoothed_wcli) %>%
select(date, beta_estimate, s_untested_estimate) %>%
arrange(date) %>%
mutate(index = 1:nrow(.)) %>%
# fill last weeks (missing from survey data) with rolling mean from previous
# 3 observations
mutate(rolled_mean_beta = RcppRoll::roll_mean(beta_estimate,
n = 3,
na.rm = FALSE,
fill = NA)) %>%
fill(rolled_mean_beta, .direction = "down") %>%
mutate(beta_estimate = ifelse(is.na(beta_estimate),
rolled_mean_beta,
beta_estimate))
smoothed_beta <- loess(beta_estimate~index, data= emp_est, span = beta_smoothing_span)
smoothed_s_untested <- loess(s_untested_estimate~index, data= emp_est, span = s_untested_smoothing_span)
emp_est <- emp_est %>%
mutate(beta_estimate_smoothed = predict(smoothed_beta),
s_untested_smoothed = predict(smoothed_s_untested)) # plot beta estimates across time
emp_est %>%
ggplot(aes(x=date, y = beta_estimate)) +
geom_line() +
geom_point(alpha = .5) +
geom_line(aes(y=beta_estimate_smoothed), color = "darkred", size = 1.2) +
theme_c() +
labs(title = "Estimates of Beta across Time",
subtitle = paste0("State: ", toupper(state_name)),
y = "Estimate of Beta")+
scale_x_date(date_breaks = "2 months", date_labels = "%b %d")# plot P(S_1|untested) estimates across time
emp_est %>%
ggplot(aes(x=date, y = s_untested_estimate)) +
geom_line() +
geom_point(alpha = .5) +
geom_line(aes(y=s_untested_smoothed), color = "darkred", size = 1.2) +
labs(title = TeX("Estimates of $P(S_1|untested)$ Across Time"),
y = "Percentage COVID-like Illness") +
theme_c()+
scale_x_date(date_breaks = "2 months", date_labels = "%b %d")#############################################################
# summarize to one observation per biweek
#############################################################
symptoms <- emp_est %>%
select(date, beta_estimate_smoothed, s_untested_smoothed) %>%
left_join(dates) %>%
group_by(biweek) %>%
# get last date since observation for date corresponds to value for previous
# 2 weeks
slice_max(n=1, order_by = date) %>%
ungroup() %>%
select(-date)
covid_county <- covid_county %>%
left_join(symptoms) %>%
# only have CTIS data starting at week 6
# filter out the beginning dates where beta_estimate_smoothed is NA
filter(!is.na(beta_estimate_smoothed))
priors_constrained_by_biweek <- covid_county %>%
select(biweek,beta_estimate_smoothed, s_untested_smoothed) %>%
arrange(biweek) %>%
# there will be more than one observation per county since
# beta estimates are at the state level
distinct() %>%
pmap_df(~ {
args <- list(...)
# message(paste0("before: beta_mean = ", prior_params$beta_mean),
# "\ns_untested_mean = ", prior_params$s_untested_mean)
prior_params$beta_mean <- args$beta_estimate_smoothed
prior_params$s_untested_mean <- args$s_untested_smoothed
# message(paste0("after: beta_mean = ", prior_params$beta_mean),
# "\ns_untested_mean = ", prior_params$s_untested_mean)
res <- do.call(get_melded, prior_params)
res$post_melding %>%
mutate(biweek= args$biweek)
})Compare Melded Priors to Original Across Time
melded_original <- readRDS(paste0(
here::here(),
"/analysis/results/melded/constrained_v1.RDS"))
# plot melded distributions and compare to original
original <- map_df(6:30, ~{
melded_original %>%
select(beta, P_S_untested) %>%
mutate(source = "original_prior",
biweek = .x)}) %>%
pivot_longer(c(beta,P_S_untested),
names_to = "variable")
priors_constrained_by_biweek %>%
select(biweek, P_S_untested, beta) %>%
pivot_longer(c(P_S_untested,beta), names_to = "variable") %>%
mutate(source = "melded distribution\ncentered at empirical value") %>%
bind_rows(original) %>%
mutate(biweek = as.factor(biweek),
variable = gsub("P_S_untested", "$P(S_1|untested)$", variable),
variable = gsub("beta", "$\\beta$", variable, fixed = TRUE)) %>%
ggplot(aes(x = value, y=fct_reorder(biweek,
as.numeric(biweek),
.desc=TRUE),
fill = source)) +
ggridges::geom_density_ridges(alpha = .6) +
facet_wrap(~variable, labeller= as_labeller(TeX,
default = label_parsed)) +
labs(y = "Biweek",
title = TeX("Comparing Post-melding Distributions Centered"),
subtitle = TeX("at Empirical Estimate of $\\beta$ and $P(S_1|untested)$ to Original Prior")) +
theme_c(legend.title = element_text(face="bold", size = 16),
plot.title = element_text(size = 20, hjust = .5),
plot.subtitle = element_text(size = 20, hjust = .5))rm(original)
rm(melded_original)# only use a few rows if testing
covid_county <- if(testing) covid_county[1:4,] else covid_county
tictoc::tic()
corrected <- pmap_df(
# covid_county[1:3,],
covid_county,
~ {
input_df <- list(...)
message(paste0(input_df$biweek, ", " , input_df$fips))
process_priors_per_county(
priors = priors_constrained_by_biweek[priors_constrained_by_biweek$biweek == input_df$biweek,],
county_df = input_df,
nsamp = prior_params$nsamp) %>%
generate_corrected_sample(.,
num_reps = corrected_sample_reps) %>%
summarize_corrected_sample()
})
tictoc::tic()saveRDS(corrected,
paste0(results_path,
"adj_",
priors_version,
".RDS"))Version 4
Only using time-varying estimate of \(P(S_1|untested)\) using the state-level estimate of the percentage of the population experiencing COVID-like illness.
# remove objects from previous version
remove(list = ls()[!ls() %in% do_not_remove] )
covid_county <- readRDS(state_data_path)
dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))
source(paste0(here::here(), "/analysis/base_functions/base_functions.R"))
priors_version <- "v4"
fb_symptoms <- readRDS(
paste0(
here::here(),
"/data/state_level/screeningpos_all_states.RDS")) %>%
filter(state == state_name)emp_est <- fb_symptoms %>%
select(signal, date, value, state) %>%
pivot_wider(names_from = signal,
values_from = value) %>%
mutate( s_untested_estimate = smoothed_wcli) %>%
select(date, s_untested_estimate) %>%
arrange(date) %>%
mutate(index = 1:nrow(.))
smoothed_s_untested <- loess(s_untested_estimate~index, data= emp_est, span = s_untested_smoothing_span)
emp_est <- emp_est %>%
mutate(s_untested_smoothed = predict(smoothed_s_untested)) # plot P(S_1|untested) estimates across time
emp_est %>%
ggplot(aes(x=date, y = s_untested_estimate)) +
geom_line() +
geom_point(alpha = .5) +
geom_line(aes(y=s_untested_smoothed), color = "darkred", size = 1.2) +
labs(title = TeX("Estimates of $P(S_1|untested)$ Across Time"),
y = "Percentage COVID-like Illness") +
theme_c()+
scale_x_date(date_breaks = "2 months", date_labels = "%b %d")#############################################################
# summarize to one observation per biweek
#############################################################
symptoms <- emp_est %>%
select(date, s_untested_smoothed) %>%
left_join(dates) %>%
group_by(biweek) %>%
# get last date since observation for date corresponds to value for previous
# 2 weeks
slice_max(n=1, order_by = date) %>%
ungroup() %>%
select(-date)
covid_county <- covid_county %>%
left_join(symptoms) %>%
# only take biweeks >= 6 since this is where we have CTIS data
filter(!is.na(s_untested_smoothed))
priors_constrained_by_biweek <- covid_county %>%
select(biweek, s_untested_smoothed) %>%
arrange(biweek) %>%
# there will be more than one observation per county since
# beta estimates are at the state level
distinct() %>%
pmap_df(~ {
args <- list(...)
# message(paste0("before: beta_mean = ", prior_params$beta_mean),
# "\ns_untested_mean = ", prior_params$s_untested_mean)
prior_params$beta_mean <- args$beta_estimate_smoothed
prior_params$s_untested_mean <- args$s_untested_smoothed
# message(paste0("after: beta_mean = ", prior_params$beta_mean),
# "\ns_untested_mean = ", prior_params$s_untested_mean)
res <- do.call(get_melded, prior_params)
res$post_melding %>%
mutate(biweek= args$biweek)
})Compare Melded Priors to Original Across Time
melded_original <- readRDS(paste0(
here::here(),
"/analysis/results/melded/constrained_v1.RDS"))
# plot melded distributions and compare to original
original <- map_df(6:30, ~{
melded_original %>%
select(P_S_untested) %>%
mutate(source = "original_prior",
biweek = .x)}) %>%
pivot_longer(c(P_S_untested),
names_to = "variable")
priors_constrained_by_biweek %>%
select(biweek, P_S_untested) %>%
pivot_longer(c(P_S_untested), names_to = "variable") %>%
mutate(source = "melded distribution\ncentered at empirical value") %>%
bind_rows(original) %>%
mutate(biweek = as.factor(biweek),
variable = gsub("P_S_untested", "$P(S_1|untested)$", variable)) %>%
ggplot(aes(x = value, y=fct_reorder(biweek,
as.numeric(biweek),
.desc=TRUE),
fill = source)) +
ggridges::geom_density_ridges(alpha = .6) +
labs(y = "Biweek",
title = TeX("Comparing Post-melding Distribution Centered"),
subtitle = TeX("at Empirical Estimate of $P(S_1|untested)$ to Original Prior")) +
theme_c(legend.title = element_text(face="bold", size = 16),
plot.subtitle = element_text(hjust = .5, size = 20),
plot.title = element_text(hjust = .5, size = 20)) rm(original)
rm(melded_original)# only use a few rows if testing
covid_county <- if(testing) covid_county[1:4,] else covid_county
tictoc::tic()
corrected <- pmap_df(
# covid_county[1:3,],
covid_county,
~ {
input_df <- list(...)
message(paste0(input_df$biweek, ", " , input_df$fips))
process_priors_per_county(
priors = priors_constrained_by_biweek[priors_constrained_by_biweek$biweek == input_df$biweek,],
county_df = input_df,
nsamp = prior_params$nsamp) %>%
generate_corrected_sample(.,
num_reps = corrected_sample_reps) %>%
summarize_corrected_sample()
})
tictoc::tic()saveRDS(corrected,
paste0(results_path,
"adj_",
priors_version,
".RDS"))